#windowsFonts(font = windowsFont('Helvetica'))
crs = 4326
library(vroom)
library(sf)
library(terra)
library(dplyr)
library(spData)
library(mapview)
library(geosphere)
library(sp)
library(rgeos)
library(ggplot2)
library(ggmap)
library(kableExtra)
library(tidyverse)
library(data.table)
#remotes::install_github("CityOfPhiladelphia/rphl")
library(rphl)
library(lubridate)
library(furrr)
library(tidycensus)
library(rgdal)
library(furrr)
library(mapview)
library(NbClust)
library(cluster)
library(psych)
ll <- function(dat, proj4 = 4326){st_transform(dat, proj4)}
census_api_key("b33ec1cb4da108659efd12b3c15412988646cbd8", overwrite = TRUE)
root.dir = "https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/DATA/"
source("https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/functions.r")
plotTheme <- function(base_size = 9, title_size = 10){
theme(
text = element_text( color = "black"),
plot.title = element_text(size = title_size, colour = "black", hjust = 0.5),
plot.subtitle = element_text( face = 'italic',
size = base_size, colour = "black", hjust = 0.5),
plot.caption = element_text( hjust=0),
axis.ticks = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_line("grey80", size = 0.01),
panel.grid.minor = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=.5),
strip.background = element_blank(),
strip.text = element_text( size=9),
axis.title = element_text( size=9),
axis.text = element_text( size=7),
axis.text.y = element_text( size=7),
plot.background = element_blank(),
legend.background = element_blank(),
legend.title = element_text( colour = "black", face = "italic", size = 9),
legend.text = element_text( colour = "black", face = "italic", size = 7),
strip.text.x = element_text( size = 9),
legend.key.size = unit(.5, 'line')
)
}
mapTheme <- function(base_size = 9, title_size = 10){
theme(
text = element_text( color = "black"),
plot.title = element_text(size = title_size, colour = "black", hjust = 0.5),
plot.subtitle = element_text( face = 'italic',
size = base_size, colour = "black", hjust = 0.5),
plot.caption = element_text( hjust=0),
axis.ticks = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size=base_size),
axis.title = element_text( size=9),
axis.text = element_blank(),
axis.text.y = element_blank(),
plot.background = element_blank(),
legend.background = element_blank(),
legend.title = element_text( colour = "black", face = "italic", size = 9),
legend.text = element_text( colour = "black", face = "italic", size = 7),
strip.text.x = element_text(size=base_size),
legend.key.size = unit(.5, 'line')
)
}
palette9 <- c('#315d7f', '#4f5d7f', '#6d5c7e', '#a36681', '#d96f83', '#f2727f', '#f6928a', '#f8a28f', '#f9b294')
palette7 <- c('#315d7f', '#4f5d7f', '#6d5c7e', '#d96f83', '#f2727f', '#f6928a', '#f9b294')
palette5 <- c("#f9b294","#f2727f","#c06c86","#6d5c7e","#315d7f")
palette4 <- c("#f9b294","#f2727f","#c06c86","#6d5c7e")
palette2 <- c("#f9b294","#f2727f")
palette1_main <- "#F2727F"
palette1_assist <- '#F9B294'
xxxxXXXXX — from Jeff
pprDistrict <- st_read('https://opendata.arcgis.com/datasets/0cdc4a1e86c6463b9600f9d9fca39875_0.geojson') %>%
st_transform(crs)
pprServiceArea <- read_sf(dsn="data/FromPPR/PPR_Service_Areas_2021/PPR_Service_Areas_2021.shp")%>%
st_transform(crs = crs)
base_map <- get_map(location = unname(st_bbox(ll(st_buffer(st_union(pprDistrict),11000)))),maptype = "terrian")
ggmap(base_map) +
geom_sf(data=ll(st_union(pprDistrict)),color="black",size=2,fill = "transparent",inherit.aes = FALSE)+
geom_sf(data=ll(pprDistrict),color='black',size=2,fill = "transparent",inherit.aes = FALSE)+
geom_sf(data=ll(pprServiceArea),color='black',size=1,fill = "transparent",inherit.aes = FALSE)+
geom_sf(data=ll(pprDistrict %>% filter(DISTRICTID %in% c(7,8,9))),color=palette1_main,size=2,fill = "transparent",inherit.aes = FALSE)+
geom_sf(data=ll(pprServiceArea %>% filter(PPR_DIST %in% c(7,8,9))),color=palette1_main,size=1,fill = "transparent",inherit.aes = FALSE)+
labs(title = "",
subtitle = "",
x="",y="")+
mapTheme()
Figure. Locations of PPR Districts and Service Areas
The whole Philadelphia is divided into 10 PPR districts, each district unit is divided into several service area units for the sake of administration. the project mainly focus on the District 7、8、9, The pink lines in the map are the services areas in District 7,8,9. Note: The 9 district does not include the Smith Area.
pprProperties <- st_read('https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson')%>%
st_transform(crs)
property <- st_join(st_centroid(pprProperties),pprServiceArea,left=TRUE) %>%
st_drop_geometry() %>%
left_join(pprProperties %>% dplyr::select(OBJECTID,geometry),by='OBJECTID') %>%
st_sf() %>%
st_transform(crs = crs) %>%
dplyr::select(-Shape__Length,-Shape__Area,-Shape_Leng,-Shape_Area) %>%
rename('ServiceAreaID' = 'INFO')
ggplot() +
geom_sf(data=property,color=palette1_main,fill = palette1_main)+
geom_sf(data=st_union(pprDistrict),color="black",size=1,fill = "transparent")+
geom_sf(data=pprDistrict,color="black",size=0.7,linetype ="dashed",fill = "transparent")+
geom_sf(data=pprDistrict %>% filter(DISTRICTID %in% c(7,8,9)),color="black",size=1,fill = "transparent")+
labs(title = "",
subtitle = "",
x="",y="")+
mapTheme()
Figure. Locations of PPR Properties
The activities hold in PPR properties are recorded in two systems: the program schedules and the permit. With the former, the PPR staffing arrange many activities seasonally in different PPR facilities. With the latter, people outside the PPR apply for conducting activities and reserve space in Parks & Recreation areas. In 2021, there are 4376 times of activities recorded in program schedules, covering 7 categories of After School, Athletic, Camp, Cultural, Educational, Environmental/Outdoor, and other activities, while 3861 times of activities are recorded in the permit system.
permit2021 <- vroom("data/FromPPR/PPR-recreation-permits-2021.csv")
program2021 <- vroom("data/FromPPR/PPR-programs-attended-2021-with-schedules.csv")
facilityID <- read.csv("data/FromPPR/tblFacility_to_PPR_Properties.csv")
# define date, filter by attendance date
program2021.clean <- program2021 %>%
mutate(AttendanceWeekDate = mdy(AttendanceWeekDate),
DateFrom = mdy(DateFrom),
DateTo = mdy(DateTo)) %>%
filter(AttendanceWeekDate > DateFrom & AttendanceWeekDate < DateTo)
# original data is recorded by week, here we change it into being recorded by occurence
program2021.clean <- separate(program2021.clean, Days,into= c("1","2","3","4","5","6","7"))
program2021.clean <- program2021.clean %>%
gather(colNames, weekday, 15:21) %>%
select(-colNames) %>%
na.omit(cols='weekday')%>%
mutate(AttendenceRealDate = case_when(
weekday == "Monday" ~ AttendanceWeekDate,
weekday == "Tuesday" ~ AttendanceWeekDate+1,
weekday == "Wednesday" ~ AttendanceWeekDate+2,
weekday == "Thursday" ~ AttendanceWeekDate+3,
weekday == "Friday" ~ AttendanceWeekDate+4,
weekday == "Saturday" ~ AttendanceWeekDate+5,
weekday == "Sunday" ~ AttendanceWeekDate+6,
))
# join property,permit and program data
program2021.join <- left_join(program2021.clean, facilityID, by =c("FacilityID" = "FacilityID")) %>%
left_join(., property, by =c("PPR_Properties_ObjectID"="OBJECTID"))
permit2021.join <- left_join(permit2021, facilityID, by =c("FacilityID")) %>%
left_join(., property, by =c("PPR_Properties_ObjectID"="OBJECTID"))
# filter the failed joining items
program2021.otherDIstrict <- program2021.join %>% filter(is.na(PPR_Properties_ObjectID))
program2021.join <- program2021.join %>% drop_na(PPR_Properties_ObjectID)
permit2021.otherDIstrict <- permit2021.join %>% filter(is.na(PPR_Properties_ObjectID))
permit2021.join <- permit2021.join %>% drop_na(PPR_Properties_ObjectID)
# Wrangle "program2021.join", and extract month attendance
Facility_Program <- program2021.join %>%
select(Facility,ActvityTypeCategory,ActivityType,
AttendanceWeekDate,
RegisteredIndividualsCount,
PPR_DISTRICT, PUBLIC_NAME, PARENT_NAME,geometry) %>%
mutate(month = case_when(month(AttendanceWeekDate)==1 ~ "Jan",
month(AttendanceWeekDate)==2 ~ "Feb",
month(AttendanceWeekDate)==3 ~ "Mar",
month(AttendanceWeekDate)==4 ~ "Apr",
month(AttendanceWeekDate)==5 ~ "May",
month(AttendanceWeekDate)==6 ~ "Jun",
month(AttendanceWeekDate)==7 ~ "Jul",
month(AttendanceWeekDate)==8 ~ "Aug",
month(AttendanceWeekDate)==9 ~ "Sep",
month(AttendanceWeekDate)==10 ~ "Oct",
month(AttendanceWeekDate)==11 ~ "Nov",
month(AttendanceWeekDate)==12 ~ "Dec")) %>%
distinct(.keep_all = FALSE)
Facility_Program_otherDistricts <- program2021.otherDIstrict %>%
select(Facility,ActvityTypeCategory,ActivityType,
AttendanceWeekDate,
RegisteredIndividualsCount,
PUBLIC_NAME, PARENT_NAME,geometry) %>%
mutate(month = case_when(month(AttendanceWeekDate)==1 ~ "Jan",
month(AttendanceWeekDate)==2 ~ "Feb",
month(AttendanceWeekDate)==3 ~ "Mar",
month(AttendanceWeekDate)==4 ~ "Apr",
month(AttendanceWeekDate)==5 ~ "May",
month(AttendanceWeekDate)==6 ~ "Jun",
month(AttendanceWeekDate)==7 ~ "Jul",
month(AttendanceWeekDate)==8 ~ "Aug",
month(AttendanceWeekDate)==9 ~ "Sep",
month(AttendanceWeekDate)==10 ~ "Oct",
month(AttendanceWeekDate)==11 ~ "Nov",
month(AttendanceWeekDate)==12 ~ "Dec")) %>%
distinct(.keep_all = FALSE)
# Aggregate weekly visites
WeekVisit <- aggregate(
RegisteredIndividualsCount ~ AttendanceWeekDate + ActvityTypeCategory + PPR_DISTRICT, data = Facility_Program, FUN = sum)
In the figure below, red legends show the locations of facilities with program schedules while orange legends show the distributions of facilities with permit records. One limitation should be pointed out here is that some facilities didn’t spatial joined the geographic information successfully, so, there are more facilities with program and permit compared to the number showing on the figure.
ggplot()+
geom_sf(data=pprServiceArea %>% filter(PPR_DIST %in% c(7,8,9)),color='black',size=0.25,linetype ="dashed", fill= "transparent")+
geom_sf(data=st_union(pprServiceArea %>% filter(PPR_DIST ==7)),color="black",size=1,fill = "transparent")+
geom_sf(data=st_union(pprServiceArea %>% filter(PPR_DIST ==8)),color="black",size=1,fill = "transparent")+
geom_sf(data=st_union(pprServiceArea %>% filter(PPR_DIST ==9)),color="black",size=1,fill = "transparent")+
geom_sf(data=permit2021.join,aes(geometry = geometry),color =palette1_assist,fill = palette1_assist, alpha = 0.7) +
geom_sf(data=program2021.join,aes(geometry = geometry),color = palette1_main,fill = palette1_main,alpha = 0.7)+
labs(title="Facilities w/ Programed (red) & Permited (orange) Activities in Disdrict 7,8 & 9")+
mapTheme()
Figure. Locations of Programs and Permits
The above dataset gives information about PPR programs & permits in 2021, including information like the duration of the program, the attendance of the program etc. But some wrangling are needed for further uses. And the wrangling is taken in the next section. Through above wrangling, we link program data to their based properties,their belonged Service Area.
There are three facilities with program scheduled in District 7: Athletic Recreation Center, Mander Playground, and Marian Anderson Recreation Center. Marian Anderson Recreation Center hold more activities in athletic category, like soccer and baseball. The other two facilities hold more cultural activities, like art and music. Athletic activities were hold from March to November while cultural and after school activities mostly took place in fall.
ggplot()+
geom_sf(data=st_union(pprServiceArea %>% filter(PPR_DIST ==7)),color="black",size=1,fill = "transparent")+
geom_sf(data=pprServiceArea %>% filter(PPR_DIST ==7),color="black",linetype ="dashed",size=1,fill = "transparent")+
geom_sf(data=permit2021.join%>% filter(PPR_DIST ==7),aes(geometry = geometry),color =palette1_assist,fill = palette1_assist, alpha = 0.7) +
geom_sf(data=program2021.join%>% filter(PPR_DIST ==7),aes(geometry = geometry),color = palette1_main,fill = palette1_main,alpha = 0.7)+
labs(title="Facilities w/ Programed (red) & Permited (orange) Activities in Disdrict 7")+
mapTheme()
Figure. Locations of Programs and Permits
ggplot(Facility_Program %>%filter(PPR_DISTRICT == 7)) +
geom_bar(aes(x= Facility,fill = ActvityTypeCategory),position="stack")+
scale_fill_manual(values = palette5)+
labs(y = "Program Frequency", fill="Program Category", title = "Facility & Program Categories in District 7")+
plotTheme()
Figure. Categories of Events in District7
ggplot(Facility_Program %>%filter(PPR_DISTRICT == 7)) +
geom_bar(aes(x= Facility, fill = ActivityType),position="stack")+
scale_fill_manual(values = palette7)+
labs(y = "Program Frequency", fill="Program sub-Category", title = "Facility & Sub-categories in District 7")+
plotTheme()
Figure. Categories of Events in District7
ggplot(WeekVisit %>%filter(PPR_DISTRICT == 7)) +
geom_line(size=0.5,aes(x = AttendanceWeekDate, y = RegisteredIndividualsCount, group = ActvityTypeCategory, colour = ActvityTypeCategory)) +
geom_point(aes(x = AttendanceWeekDate, y = RegisteredIndividualsCount, group = ActvityTypeCategory, colour = ActvityTypeCategory, size = RegisteredIndividualsCount)) +
scale_color_manual(values = palette5)+
scale_size_continuous(range = c(2, 4))+
labs(title = "Visitor Counts by Week and Activity Categories in District 7",
color = "Program Category",
size="Visitor Counts",
x = "Week of the Year",
y = "Visitor Counts")+
plotTheme()
Figure. Categories of Events in District7
There are five facilities with program scheduled in District 8: 48th & Woodland Playground, Christy Recreation Center, Howards S. Morris Recreation Center, Laura Sims Skate House, and Shepard Recreation Center. Laura Sims Skate House hold hockey and ice skating activities from October to May the next year, while Morris Recreation Center hosted more cultural activities like dance as well as athletic activities of gymnastics, tumbling and basketball from October to December.
ggplot()+
geom_sf(data=st_union(pprServiceArea %>% filter(PPR_DIST ==8)),color="black",size=1,fill = "transparent")+
geom_sf(data=pprServiceArea %>% filter(PPR_DIST ==8),color="black",linetype ="dashed",size=1,fill = "transparent")+
geom_sf(data=permit2021.join%>% filter(PPR_DIST ==8),aes(geometry = geometry),color =palette1_assist,fill = palette1_assist, alpha = 0.7) +
geom_sf(data=program2021.join%>% filter(PPR_DIST ==8),aes(geometry = geometry),color = palette1_main,fill = palette1_main,alpha = 0.7)+
labs(title="Facilities w/ Programed (red) & Permited (orange) Activities in Disdrict 8")+
mapTheme()
Figure. Locations of Programs and Permits
ggplot(Facility_Program %>%filter(PPR_DISTRICT == 8)) +
geom_bar(aes(x= Facility,fill = ActvityTypeCategory),position="stack")+
scale_fill_manual(values = palette5)+
labs(y = "Program Frequency", fill="Program Category", title = "Facility & Program Categories in District 8")+
plotTheme()
Figure. Categories of Events in District8
ggplot(Facility_Program %>%filter(PPR_DISTRICT == 8)) +
geom_bar(aes(x= Facility, fill = ActivityType),position="stack")+
scale_fill_manual(values = palette9)+
labs(y = "Program Frequency", fill="Program sub-Category", title = "Facility & Sub-categories in District 8")+
plotTheme()
Figure. Categories of Events in District8
ggplot(WeekVisit %>%filter(PPR_DISTRICT == 8)) +
geom_point(aes(x = AttendanceWeekDate, y = RegisteredIndividualsCount, group = ActvityTypeCategory, colour = ActvityTypeCategory, size = RegisteredIndividualsCount)) +
geom_line(size=0.5,aes(x = AttendanceWeekDate, y = RegisteredIndividualsCount, group = ActvityTypeCategory, colour = ActvityTypeCategory)) +
scale_color_manual(values = palette5)+
scale_size_continuous(range = c(2, 4))+
labs(title = "Visitor Counts by Week and Activity Categories in District 8",
color = "Program Category",
size="Visitor Counts",
x = "Week of the Year",
y = "Visitor Counts")+
plotTheme()
Figure. Categories of Events in District8
There are seven facilities with program scheduled in District 9: Barry Playground and Pool, Cibotti Recreation Center, DiSilvestro Playground, East Passyunk Community Center, Eastwick Regional Playground, and Thomas B. Smith Recreation Center. Athletic activities of basketball and aquatics mostly took place in Barry Playground and Pool and East Passyunk Community Center. Eastwick Regional Playground, and Thomas B. Smith Recreation Center are the most two popular facilities with activities of athletic, after school, cultural, educational, and other categories. Athletic activities took place throughout the whole year, while other category of activities, like older adults and mentoring, were hold in the 2nd half of the year. Cultural activities, like art, dance, music, usually suspended in summer.
ggplot()+
geom_sf(data=st_union(pprServiceArea %>% filter(PPR_DIST ==9)),color="black",size=1,fill = "transparent")+
geom_sf(data=pprServiceArea %>% filter(PPR_DIST ==9),color="black",linetype ="dashed",size=1,fill = "transparent")+
geom_sf(data=permit2021.join%>% filter(PPR_DIST ==9),aes(geometry = geometry),color =palette1_assist,fill = palette1_assist, alpha = 0.7) +
geom_sf(data=program2021.join%>% filter(PPR_DIST ==9),aes(geometry = geometry),color = palette1_main,fill = palette1_main,alpha = 0.7)+
labs(title="Facilities w/ Programed (red) & Permited (orange) Activities in Disdrict 9")+
mapTheme()
Figure. Locations of Programs and Permits
ggplot(Facility_Program %>%filter(PPR_DISTRICT == 9)) +
geom_bar(aes(x= Facility,fill = ActvityTypeCategory),position="stack")+
scale_fill_manual(values = palette7[2:7])+
labs(y = "Program Frequency", fill="Program Category", title = "Facility & Program Categories in District 9")+
plotTheme()
Figure. Categories of Events in District8
ggplot(WeekVisit %>%filter(PPR_DISTRICT == 9)) +
geom_point(aes(x = AttendanceWeekDate, y = RegisteredIndividualsCount, group = ActvityTypeCategory, colour = ActvityTypeCategory, size = RegisteredIndividualsCount)) +
geom_line(size=0.5,aes(x = AttendanceWeekDate, y = RegisteredIndividualsCount, group = ActvityTypeCategory, colour = ActvityTypeCategory)) +
scale_color_manual(values = palette7[2:7])+
scale_size_continuous(range = c(2, 4))+
labs(title = "Visitor Counts by Week and Activity Categories in District 9",
color = "Program Category",
size="Visitor Counts",
x = "Week of the Year",
y = "Visitor Counts")+
plotTheme()
Figure. Categories of Events in District8
There are 27 facilities with program scheduled in other districts of PPR serving areas. We will have further analysis after PPR provide specific sorting information.
ggplot(Facility_Program_otherDistricts) +
geom_bar(aes(x= Facility,fill = ActvityTypeCategory),position="stack")+
scale_fill_manual(values = palette7)+
labs(y = "Program Frequency", fill="Program Category", title = "Facility & Program Categories in District 9")+
plotTheme()+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Figure. Categories of Events in other Districts
# brand_info <- vroom("data/safegraph/Philadelphia-Camden-WilmingtonPA-NJ-DE-MDMSA-CORE_POI-2021_11-2021-12-17/brand_info.csv")
# core_poi <- vroom("data/safegraph/Philadelphia-Camden-WilmingtonPA-NJ-DE-MDMSA-CORE_POI-2021_11-2021-12-17/core_poi.csv")
#
# monthList = c("01","02","03","04","05","06","07","08","09","10","11")
#
# patternAllMonth = data.frame()
# for (i in monthList){
# currentMonth = vroom(paste("data/safegraph/SafeGraph Data Purchase Dec-16-2021/Philadelphia-Camden-WilmingtonPA-NJ-DE-MDMSA-PATTERNS-2021_",
# i,
# "-2021-12-17/patterns.csv.gz",sep = ""))%>%
# filter(region=="PA")%>%
# mutate(month=paste(i,sep = ""))
# patternAllMonth = rbind(patternAllMonth,currentMonth)
# }
#
# # filter into philly, join with POI data
# safeGraph <- patternAllMonth %>%
# filter(city == "Philadelphia")%>%
# left_join(core_poi %>% dplyr::select(placekey,location_name,top_category,sub_category,naics_code,latitude,longitude),
# by=c("placekey"="placekey","location_name" = "location_name"),keep=FALSE)
#
# # create geometry from lat & lng
# safeGraph.geo <-
# safeGraph %>%
# st_as_sf(coords = c("longitude", "latitude"), crs = crs, agr = "constant", na.fail=FALSE)
# patternAllMonth <- st_read("data/output/patternAllMonth.csv")
#st_write(safeGraph.geo,"data/output/safeGraph.geo.GeoJSON")
safeGraph.geo <- st_read("data/output/safeGraph.geo.GeoJSON",crs = crs)
## Reading layer `safeGraph.geo' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\safeGraph.geo.GeoJSON'
## using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 198218 features and 31 fields (with 11409 geometries empty)
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.55742 ymin: 39.85988 xmax: -74.93288 ymax: 40.1344
## Geodetic CRS: WGS 84
# change workers number by yourself
plan(multiprocess, workers = 10)
# keep congeneric bussiness
congenericMoves <-
safeGraph.geo %>%
filter(top_category %in% c("Promoters of Performing Arts, Sports, and Similar Events","Other Amusement and Recreation Industries","Museums, Historical Sites, and Similar Institutions") | str_detect(location_name, "Park") | str_detect(location_name, "Playground") | str_detect(location_name, "Recreation Center")) %>%
filter(str_detect(location_name, "Parking", negate = TRUE))
# Keep only ppr sites
parks <- safeGraph.geo %>%
dplyr::select(placekey, naics_code, location_name) %>%
distinct() %>%
filter(naics_code %in% c(712190, 713990, 713940, 711310) | str_detect(location_name, "Park") | str_detect(location_name, "Playground") | str_detect(location_name, "Recreation Center")) %>%
filter(str_detect(location_name, "Parking", negate = TRUE)) %>%
st_transform(crs = 4326)
PPRmoves <- safeGraph.geo %>%
filter(placekey %in% as.list(parks$placekey))
712190:Nature Parks and Other Similar Institutions; 713990:All Other Amusement and Recreation Industries; 713940: Fitness and Recreational Sports Centers; 711310:Promoters of Performing Arts, Sports, and Similar Events
# join filtered safeGraph place with ppr property
propertyWithPlaceKey <- st_join(property %>% st_buffer(10),parks %>% dplyr::select(placekey, geometry),left=FALSE) %>%
st_drop_geometry() %>%
left_join(property %>% dplyr::select(OBJECTID),by=('OBJECTID'='OBJECTID')) %>%
st_sf() %>%
st_transform(crs=crs) %>%
drop_na(placekey)
# join filtered safeGraph place with ppr programs
program2021.joinWithPlaceKey <-
st_join(program2021.join %>%
st_sf() %>%
st_transform(crs=crs) %>%
st_buffer(10),
parks %>% dplyr::select(placekey, geometry),left=FALSE) %>%
st_drop_geometry() %>%
merge.data.frame(program2021.join %>%
dplyr::select(geometry),
by='row.names')%>%
dplyr::select(-Row.names) %>%
st_sf() %>%
st_transform(crs=crs)
mapview(property)+mapview(st_centroid(propertyWithPlaceKey),col.regions = "green")
In the above map, the polygon is the properties of PPR, the green dots are the successfully join properties with placekey
# unnest visit Count data
# visitCount <-
# PPRmoves %>%
# select(placekey, date_range_start, date_range_end, visits_by_day) %>%
# mutate(date_range_start = as_date(date_range_start),
# date_range_end = as_date(date_range_end)) %>%
# dplyr::select(-date_range_end) %>%
# mutate(visits_by_day = future_map(visits_by_day, function(x){
# unlist(x) %>%
# as_tibble() %>%
# rowid_to_column(var = "day") %>%
# mutate(visits = value) %>%
# dplyr::select(-value)
# })) %>%
# unnest(cols = c("visits_by_day"))
# visitCount <- visitCount %>%
# rename(visitDay = date_range_start) %>%
# mutate(visitDay = day+visitDay-1) %>%
# mutate(month = month(visitDay))
# st_write(visitCount,"data/output/visitCount.GeoJSON")
visitCount <- st_read("data/output/visitCount.GeoJSON",crs=crs)
## Reading layer `visitCount' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\visitCount.GeoJSON'
## using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 341467 features and 5 fields (with 8647 geometries empty)
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS: WGS 84
# unnest popularity_by_hour data
# visitHour <-
# PPRmoves %>%
# select(placekey, popularity_by_hour, date_range_start) %>%
# mutate(date_range_start = as_date(date_range_start),
# month = month(date_range_start)) %>%
# dplyr::select(-date_range_start) %>%
# mutate(popularity_by_hour = future_map(popularity_by_hour, function(x){
# unlist(x) %>%
# as_tibble() %>%
# rowid_to_column(var = "hour") %>%
# rename(visit = value)
# }))%>%
# unnest(popularity_by_hour)
#
# st_write(visitHour,"data/output/visitHour.GeoJSON")
visitHour <- st_read("data/output/visitHour.GeoJSON",crs=crs)
## Reading layer `visitHour' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\visitHour.GeoJSON'
## using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 269880 features and 4 fields (with 6840 geometries empty)
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS: WGS 84
# unnest the origin-destination data
# originCount <-
# PPRmoves %>%
# select(placekey, visitor_home_cbgs, date_range_start) %>%
# mutate(date_range_start = as_date(date_range_start),
# month = month(date_range_start)) %>%
# dplyr::select(-date_range_start) %>%
# mutate(visitor_home_cbgs = future_map(visitor_home_cbgs, function(x){
# jsonlite::fromJSON(x) %>%
# as_tibble() %>%
# dplyr::select(starts_with("4")) %>%
# gather()
# })) %>%
# unnest(visitor_home_cbgs) %>%
# rename(origin =key ,visitors= value)
#
# st_write(originCount,"data/output/originCount.GeoJSON")
originCount <- st_read("data/output/originCount.GeoJSON",crs=crs)
## Reading layer `originCount' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\originCount.GeoJSON'
## using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 348519 features and 4 fields (with 8681 geometries empty)
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS: WGS 84
#unnest the departure - destination data
# departCount <-
# PPRmoves %>%
# select(placekey, visitor_daytime_cbgs, date_range_start) %>%
# mutate(date_range_start = as_date(date_range_start),
# month = month(date_range_start)) %>%
# dplyr::select(-date_range_start) %>%
# mutate(visitor_daytime_cbgs = future_map(visitor_daytime_cbgs, function(x){
# jsonlite::fromJSON(x) %>%
# as_tibble() %>%
# dplyr::select(starts_with("4")) %>%
# gather()
# }))%>%
# unnest(visitor_daytime_cbgs) %>%
# rename(departure =key ,visitors= value)
#
# st_write(departCount,"data/output/departCount.GeoJSON")
departCount <- st_read("data/output/departCount.GeoJSON",crs=crs)
## Reading layer `departCount' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\departCount.GeoJSON'
## using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 299627 features and 4 fields (with 7380 geometries empty)
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS: WGS 84
# unnest the bucketed_dwell_times data
# dwellTime <-
# PPRmoves %>%
# select(placekey, bucketed_dwell_times, date_range_start) %>%
# mutate(date_range_start = as_date(date_range_start),
# month = month(date_range_start)) %>%
# dplyr::select(-date_range_start) %>%
# mutate(bucketed_dwell_times = future_map(bucketed_dwell_times, function(x){
# jsonlite::fromJSON(x) %>%
# as_tibble() %>%
# gather()
# }))%>%
# unnest(bucketed_dwell_times) %>%
# rename(dwellTimes =key ,visitors= value)
#
# st_write(dwellTime,"data/output/dwellTime.GeoJSON")
dwellTime <- st_read("data/output/dwellTime.GeoJSON",crs=crs)
## Reading layer `dwellTime' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\dwellTime.GeoJSON'
## using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 78715 features and 4 fields (with 1995 geometries empty)
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS: WGS 84
# unnest the related_same_day_brand
# relatedBrand <-
# PPRmoves %>%
# select(placekey, related_same_day_brand, date_range_start) %>%
# mutate(date_range_start = as_date(date_range_start),
# month = month(date_range_start)) %>%
# dplyr::select(-date_range_start) %>%
# mutate(related_same_day_brand = future_map(related_same_day_brand, function(x){
# jsonlite::fromJSON(x) %>%
# as_tibble() %>%
# gather()
# }))
#
# relatedBrand <- relatedBrand %>%
# unnest(related_same_day_brand) %>%
# rename(relatedBrand =key ,visitors= value)
#
# st_write(relatedBrand,"data/output/relatedBrand.GeoJSON")
relatedBrand <- st_read("data/output/relatedBrand.GeoJSON",crs=crs)
## Reading layer `relatedBrand' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\relatedBrand.GeoJSON'
## using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 209840 features and 4 fields (with 5124 geometries empty)
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS: WGS 84
# unnest the popularity_by_day data
# visitWeekday <-
# PPRmoves %>%
# select(placekey, popularity_by_day, date_range_start) %>%
# mutate(date_range_start = as_date(date_range_start),
# month = month(date_range_start)) %>%
# dplyr::select(-date_range_start) %>%
# mutate(popularity_by_day = future_map(popularity_by_day, function(x){
# jsonlite::fromJSON(x) %>%
# as_tibble() %>%
# gather()
# }))%>%
# unnest(popularity_by_day) %>%
# rename(visitWeekday =key ,visits= value)
#
# st_write(visitWeekday,"data/output/visitWeekday.GeoJSON")
visitWeekday <- st_read("data/output/visitWeekday.GeoJSON",crs=crs)
## Reading layer `visitWeekday' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\visitWeekday.GeoJSON'
## using driver `GeoJSON'
## replacing null geometries with empty geometries
## Simple feature collection with 78715 features and 4 fields (with 1995 geometries empty)
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS: WGS 84
Sometimes, SafeGraph will count “Residents” or “passer-bys” of the position in “visits”, which will result in huge bias and mislead the analysis. So, to conclude typical position types of SafeGraph data bias, we choose midnight visitsand dwell time as factors for cluster analysis. We also apply principal component analysis here to assist in understanding characteristics of clusters.
Taking results of both cluster analysis and component analysis, most locations, 1094 out of 1117, belong to cluster 3 with reliable data. Cluster 2 includes 14 places with extremely more night visits and long visits that are more than 2 hours. Cluster 1 consists of 9 locations with many transitory visits that are less than 10 minutes.
# calculate midnight visit
visitHourNight <- visitHour %>%
filter(hour >= 1 | hour <=5) %>%
group_by(placekey) %>%
summarize(nightVisit = sum(visit))
# integrate sg data by midnight visit and dweel time
visitIntegrated <- full_join(visitHourNight %>%
st_drop_geometry(),
dwellTime %>%
group_by(placekey, dwellTimes) %>%
summarize(visitors = sum(visitors)),
by=c('placekey'))
# wide format
visitIntegrated <- visitIntegrated %>%
spread(key = dwellTimes, value = visitors)
# export
# st_write(visitIntegrated, "data/output/visitIntegrated.GeoJSON")
# import
visitIntegrated <- st_read("data/output/visitIntegrated.GeoJSON") %>%
rename(c("Dwell<5"="X.5","Dwell>240"="X.240","Dwell11-20"="X11.20","Dwell121-240"="X121.240","Dwell21-60"="X21.60","Dwell5-10"="X5.10","Dwell61-120"="X61.120" ))
## Reading layer `visitIntegrated' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\visitIntegrated.GeoJSON'
## using driver `GeoJSON'
## Simple feature collection with 1117 features and 9 fields (with 29 geometries empty)
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: -75.26586 ymin: 39.87597 xmax: -74.95775 ymax: 40.13295
## Geodetic CRS: WGS 84
visitIntegrated <- visitIntegrated[,c(1,2,3,8,5,7,9,6,4)]
# normalize
visitIntegrated.scale <- scale(visitIntegrated %>%
dplyr::select(-placekey) %>%
st_drop_geometry())
set.seed(1234)
# decide cluster number (only run once)
#nc <- NbClust(visitIntegrated.scale, min.nc=2, max.nc=15, method="kmeans", index="all")
#table(nc$Best.n[1,])
#
# barplot(table(nc$Best.n[1,]),
# xlab="Numer of Clusters", ylab="Number of Criteria",
# main="Number of Clusters Chosen by 26 Criteria")
# Run K-Means cluster
cluster1 <- kmeans(visitIntegrated.scale, 3)
summary(cluster1)
## Length Class Mode
## cluster 1117 -none- numeric
## centers 24 -none- numeric
## totss 1 -none- numeric
## withinss 3 -none- numeric
## tot.withinss 1 -none- numeric
## betweenss 1 -none- numeric
## size 3 -none- numeric
## iter 1 -none- numeric
## ifault 1 -none- numeric
# add cluster number back
visitIntegrated <- visitIntegrated %>%
mutate(cluster = cluster1$cluster)
# mean by cluster
cluster1_mean <- aggregate(visitIntegrated %>%
dplyr::select(-placekey, -cluster) %>%
st_drop_geometry(),
by=list(cluster=visitIntegrated$cluster),
FUN=mean) %>%
left_join(visitIntegrated %>%
st_drop_geometry() %>%
group_by(cluster) %>%
summarize(size = n()),
by="cluster")
kable(cluster1_mean,align = 'c',caption = '<center>Table 1. Mean values of clusters for SafeGraph data <center/>') %>%
kable_classic(full_width = F)%>%
kable_styling(position = "center")%>%
scroll_box(width = "100%", height = "400px")
| cluster | nightVisit | Dwell<5 | Dwell5-10 | Dwell11-20 | Dwell21-60 | Dwell61-120 | Dwell121-240 | Dwell>240 | size |
|---|---|---|---|---|---|---|---|---|---|
| 1 | 172970.00 | 1678.88889 | 15356.1111 | 9700.4444 | 16923.7778 | 11926.0000 | 8894.4444 | 5530.0000 | 9 |
| 2 | 2589580.14 | 340.57143 | 3161.0000 | 4029.0714 | 26152.4286 | 48977.9286 | 64342.5714 | 172938.8571 | 14 |
| 3 | 17164.71 | 50.56856 | 488.9762 | 337.9122 | 824.6335 | 659.4059 | 531.0649 | 926.6846 | 1094 |
## put histograms on the diagonal
panel.hist <- function(x, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks; nB <- length(breaks)
y <- h$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, ...)
}
#Color points by groups
my_cols <- palette4[1:3]
pairs(visitIntegrated %>%
st_drop_geometry() %>%
mutate(`Dwel<10`=`Dwell<5`+`Dwell5-10`,
`Dwell11-120` = `Dwell11-20` + `Dwell21-60` + `Dwell61-120`,
`Dwell>120` = `Dwell121-240` + `Dwell>240`) %>%
dplyr::select(nightVisit, `Dwel<10`, `Dwell11-120`, `Dwell>120`),
pch = 19, cex = 1, diag.panel = panel.hist,
col = my_cols[visitIntegrated$cluster],
lower.panel=NULL, panel = panel.smooth)
Figure. Correlation Matrix of SafeGraph Bias
x <- clusplot(visitIntegrated.scale,
cluster1$cluster, color=TRUE, shade=TRUE, main = "",
labels=5, lines=0, stand=TRUE, col.txt=palette5[1:3], col.clus=palette5[1:3], col.p=palette5[5])
Figure. Cluster Plot with Main Component
# decide component number (2)
fa.parallel(as.data.frame(visitIntegrated.scale), fa = 'pc', n.iter = 100, show.legend = FALSE)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Parallel analysis suggests that the number of factors = NA and the number of components = 2
# principal component analysis
set.seed(1234)
cluster1_pca <- principal(visitIntegrated.scale, nfactors = 2, rotate = "none", scores = TRUE)
options(digits = 2)
kable(cluster1_pca$Structure[1:8,1:2],align = 'c',caption = '<center>Table 2. Correlations between variables and components <center/>') %>%
kable_classic(full_width = F)%>%
kable_styling(position = "center")%>%
scroll_box(width = "100%", height = "400px")
| PC1 | PC2 | |
|---|---|---|
| nightVisit | 0.89 | -0.44 |
| Dwell<5 | 0.64 | 0.75 |
| Dwell5-10 | 0.64 | 0.75 |
| Dwell11-20 | 0.79 | 0.57 |
| Dwell21-60 | 0.99 | 0.01 |
| Dwell61-120 | 0.95 | -0.29 |
| Dwell121-240 | 0.92 | -0.38 |
| Dwell>240 | 0.88 | -0.47 |
Based on the bias analysis above, we rectify SafeGraph data depending on its cluster / type. First, all visit counts with dwell time that is less than 5 minutes are removed. Also, for cluster 1, visit counts with dwell time that is between 5 and 10 minutes will be reduced referring to the related proportion in the normal cluster 3. On top of that, long visits that are longer than 2 hours in Cluster 2 will be trimmed into the appropriate ratio according to midnight visitors which can be treated as “residents”.
# cluster info
cluster <- visitIntegrated %>%
st_drop_geometry() %>%
dplyr::select(placekey, cluster)
# caculate the ratio to trim "Dwell5-10" for cluster 1
Sum <- visitIntegrated %>%
st_drop_geometry() %>%
mutate(Total = `Dwell<5`+`Dwell5-10`+`Dwell11-20`+`Dwell21-60`+`Dwell61-120`+`Dwell121-240`+`Dwell>240`) %>%
group_by(cluster) %>%
summarize(`Dwell5-10` = sum(`Dwell5-10`),
`Dwell<5` = sum(`Dwell<5`),
Total = sum(Total))
ratio5To10 <- Sum[1,]$`Dwell5-10`/Sum[1,]$Total
group1.dwell <- dwellTime %>%
st_drop_geometry() %>%
spread(key=dwellTimes, value = visitors) %>%
left_join(visitIntegrated %>% dplyr::select(placekey, cluster) %>% st_drop_geometry(), by="placekey") %>%
filter(cluster==1) %>%
mutate(Total = `<5`+`5-10`+`11-20`+`21-60`+`61-120`+`121-240`+`>240`,
`rectified5-10`=ifelse(Total*ratio5To10<=`5-10`,Total*ratio5To10,`5-10`),
`rectified5-10rate`=`rectified5-10`/Total,
`<5rate`=`<5`/Total)
group23.dwell <- dwellTime %>%
st_drop_geometry() %>%
spread(key=dwellTimes, value = visitors) %>%
mutate(Total = `<5`+`5-10`+`11-20`+`21-60`+`61-120`+`121-240`+`>240`,
`<5rate`=`<5`/Total)
# caculate the ratio to trim "Dwell121-240" and "Dwell>240" for cluster 2
ratio2h <- visitHour %>%
st_drop_geometry() %>%
spread(key = hour, value = visit) %>%
left_join(visitIntegrated %>%
st_drop_geometry() %>%
dplyr::select(placekey, cluster)) %>%
mutate(residents = (`1`+`2`+`3`+`4`+`5`)/5,
hourlySum = (`1`+`2`+`3`+`4`+`5`+`6`+`7`+`8`+`9`+`10`+`11`+`12`+`13`+`14`+`15`+`16`+`17`+`18`+`19`+`20`+`21`+`22`+`23`+`24`),
`ratio>2h` = 17*residents/hourlySum) %>%
dplyr::select(placekey, month, `ratio>2h`, cluster)
## Joining, by = "placekey"
# rectify 'visitHour'
visitHour <- visitHour %>%
left_join(ratio2h, by=c("placekey","month"))
visitHour <- visitHour %>%
filter(cluster==2) %>%
filter(hour<=7 | hour>=19) %>%
mutate(visit = visit*(1-`ratio>2h`)) %>%
rbind(
visitHour %>%
filter(cluster!=2)
) %>%
rbind(
visitHour %>%
filter(cluster==2) %>%
filter(hour>7 & hour<19)
) %>%
dplyr::select(-`ratio>2h`, -cluster)
# rectify 'dwellTime'
dwellTime <- dwellTime %>%
left_join(ratio2h, by=c("placekey","month"))
dwellTime <- dwellTime %>%
filter(cluster==2) %>%
filter(dwellTimes=="121-240" | dwellTimes==">240") %>%
mutate(visitors = visitors*(1-`ratio>2h`)) %>%
rbind(dwellTime %>%
filter(cluster==2) %>%
filter(dwellTimes!="121-240" & dwellTimes!=">240")) %>%
rbind(dwellTime %>%
filter(cluster==1) %>%
filter(dwellTimes=="5-10") %>%
left_join(group1.dwell %>% dplyr::select(placekey,month,`rectified5-10`),by=c("placekey","month")) %>%
mutate(visitors = `rectified5-10`) %>%
dplyr::select(-`rectified5-10`)) %>%
rbind(dwellTime %>%
filter(cluster==1) %>%
filter(dwellTimes!="5-10")) %>%
rbind(dwellTime %>%
filter(cluster==3)) %>%
dplyr::select(-`ratio>2h`, -cluster)
dwellTime <- dwellTime %>%
filter(dwellTimes!='<5')
# rectify 'visitCount'
visitCount <- visitCount %>%
left_join(ratio2h, by=c("placekey","month"))
visitCount <- visitCount %>%
filter(cluster==1) %>%
left_join(group1.dwell,by=c("placekey","month")) %>%
mutate(visits = visits*(1-`rectified5-10rate`-`<5rate`)) %>%
dplyr::select(placekey,visitDay,day,visits,month,geometry) %>%
rbind(visitCount %>%
filter(cluster==2)%>%
left_join(group23.dwell,by=c("placekey","month")) %>%
left_join(ratio2h, by=c("placekey","month")) %>%
mutate(visits = visits*(1-`<5rate`-`ratio>2h.y`)) %>%
dplyr::select(placekey,visitDay,day,visits,month,geometry)) %>%
rbind(visitCount %>%
filter(cluster==3) %>%
left_join(group23.dwell,by=c("placekey","month")) %>%
mutate(visits = visits*(1-`<5rate`)) %>%
dplyr::select(placekey,visitDay,day,visits,month,geometry)) %>%
mutate(visits = round(visits,0))
sumVisit <- visitCount %>%
dplyr::select(-visitDay,-day,-month) %>%
group_by(placekey) %>%
summarise(visits=sum(visits))
ggplot(sumVisit)+
geom_sf(data=pprServiceArea,color='black',size=0.25,fill= "transparent")+
geom_sf(data=pprServiceArea %>% filter(PPR_DIST %in% c(7,8,9)),color='black',size=0.5,fill= "transparent")+
geom_sf(aes(size = visits),color = palette1_main,fill = palette1_main,alpha = 0.3) +
scale_size_continuous(range = c(1, 3),name = "Visits")+
mapTheme()+
theme(legend.position = "bottom",
legend.key.width = unit(0.5, 'cm'),
legend.key.height = unit(0.2, 'cm'))
Figure. Map of total device visits
sumVisitServiceArea <- pprServiceArea %>%
st_join(sumVisit,left =TRUE) %>%
drop_na(INFO) %>%
dplyr::select(INFO,visits,geometry) %>%
group_by(INFO) %>%
mutate(totalVisits=sum(visits)) %>%
dplyr::select(-visits) %>%
distinct() %>%
st_sf() %>%
st_transform(crs=crs)
ggplot(sumVisitServiceArea)+
geom_sf(aes(fill = q5(totalVisits)),color = 'white',size=0.5) +
scale_fill_manual(values = palette5,labels = qBr(sumVisitServiceArea,'totalVisits'),name = "Total Device Visits") +
mapTheme()+
theme(legend.position = "bottom",
legend.key.width = unit(0.5, 'cm'),
legend.key.height = unit(0.2, 'cm'))
Figure. Map of total device visits
# CensusData <-
# get_acs(geography = "block group",
# variables = c("B01003_001E"),
# year=2019, state="PA", county="Philadelphia", geometry=T, output="wide") %>%
# st_transform(crs=4326) %>%
# dplyr::select(-NAME,-starts_with('B')) %>%
# st_centroid() %>%
# as.data.frame() %>%
# rename("originGeometry" = "geometry")
#
# placekeyGeometry <-
# originCount %>%
# dplyr::select(placekey) %>%
# group_by(placekey) %>% summarise()
#
# orgCountPlot <- originCount %>%
# st_drop_geometry() %>%
# group_by(origin,placekey) %>%
# summarise(visitors=sum(visitors))%>%
# left_join(placekeyGeometry,by=c("placekey" = "placekey")) %>%
# rename("parkGeometry" = "geometry")%>%
# filter(startsWith(origin,"42101"))%>%
# left_join(CensusData,by=c("origin" = "GEOID"))%>%
# mutate(park.y=as.numeric(map(parkGeometry,function(x){return(x[2])})),
# park.x=as.numeric(map(parkGeometry,function(x){return(x[1])})),
# origin.y=as.numeric(map(originGeometry,function(x){return(x[2])})),
# origin.x=as.numeric(map(originGeometry,function(x){return(x[1])})))
#
# st_write(orgCountPlot,"data/output/orgCountPlot.GEOJSON")
orgCountPlot <- st_read("data/output/orgCountPlot.GEOJSON")
## Reading layer `orgCountPlot' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\orgCountPlot.GEOJSON'
## using driver `GeoJSON'
## Simple feature collection with 105487 features and 7 fields (with 1287 geometries empty)
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: -75 ymin: 40 xmax: -75 ymax: 40
## Geodetic CRS: WGS 84
orgCountPlot2 <- orgCountPlot%>%
filter(visitors>200)
orgCountPlotHF <- orgCountPlot%>%
filter(visitors>2000)
ggplot(data = orgCountPlot2) +
geom_sf(data = pprServiceArea %>% st_transform(crs=4326),fill ="transparent", color="black",size=0.5)+
geom_sf(data=pprDistrict %>% st_transform(crs=4326),fill ="transparent", color="black",size=1)+
geom_curve(aes(x = origin.x, y = origin.y,xend = park.x,yend = park.y,color = q5(visitors)),
size = 0.5,
curvature = -0.2,
alpha = 0.4,)+
scale_color_manual(values = palette5,
labels = qBr(orgCountPlot2,"visitors"),
name = "Device Visits (Quintile Breaks)") +
labs(x="",y="")+
mapTheme()+
theme(legend.position = "bottom",panel.spacing = unit(6, 'lines'),
legend.key.width = unit(0.5, 'cm'),
legend.key.height = unit(0.2, 'cm'))
Figure. Flow map of parks and origins
ggplot(data = orgCountPlotHF) +
geom_sf(data = pprServiceArea %>% st_transform(crs=4326),fill ="transparent", color="black",size=1)+
geom_sf(data=pprDistrict %>% st_transform(crs=4326),fill ="transparent", color="black",size=2)+
geom_curve(aes(x = origin.x,y = origin.y,xend = park.x,yend = park.y),
size = 1,color = palette1_main,curvature = -0.2, alpha = 0.4)+
labs(x="",y="")+
mapTheme()+
theme(legend.position = "bottom",panel.spacing = unit(6, 'lines'),
legend.key.width = unit(0.5, 'cm'),
legend.key.height = unit(0.2, 'cm'))
Figure. Flow map of parks and origins - High Frequency
# depaCountPlot <- departCount %>%
# st_drop_geometry() %>%
# group_by(departure,placekey) %>%
# summarise(visitors=sum(visitors))%>%
# left_join(placekeyGeometry,by=c("placekey" = "placekey")) %>%
# rename("parkGeometry" = "geometry")%>%
# filter(startsWith(departure,"42101"))%>%
# left_join(CensusData,by=c("departure" = "GEOID"))%>%
# mutate(park.y=as.numeric(map(parkGeometry,function(x){return(x[2])})),
# park.x=as.numeric(map(parkGeometry,function(x){return(x[1])})),
# departure.y=as.numeric(map(originGeometry,function(x){return(x[2])})),
# departure.x=as.numeric(map(originGeometry,function(x){return(x[1])})))
#
# st_write(depaCountPlot,"data/output/depaCountPlot.GEOJSON")
depaCountPlot <- st_read("data/output/depaCountPlot.GEOJSON")
## Reading layer `depaCountPlot' from data source
## `C:\PENN\2022\MUSA_801_Practicum\MUSA_801_PPR\data\output\depaCountPlot.GEOJSON'
## using driver `GeoJSON'
## Simple feature collection with 98685 features and 7 fields (with 1219 geometries empty)
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: -75 ymin: 40 xmax: -75 ymax: 40
## Geodetic CRS: WGS 84
depaCountPlot2 <- depaCountPlot%>%
filter(visitors>200)
depaCountPlotHF <- depaCountPlot%>%
filter(visitors>2000)
ggplot(data = depaCountPlot2) +
geom_sf(data = pprServiceArea %>% st_transform(crs=4326),fill ="transparent", color="black",size=0.5)+
geom_sf(data=pprDistrict %>% st_transform(crs=4326),fill ="transparent", color="black",size=1)+
geom_curve(aes(x = departure.x, y = departure.y, xend = park.x, yend = park.y,color = q5(visitors)),
size = 0.5,curvature = -0.2, alpha = 0.4)+
scale_color_manual(values = palette5,
labels = qBr(depaCountPlot2,"visitors"),
name = "Device Visits (Quintile Breaks)") +
labs(x="",y="")+
mapTheme()+
theme(legend.position = "bottom",panel.spacing = unit(6, 'lines'),
legend.key.width = unit(0.5, 'cm'),
legend.key.height = unit(0.2, 'cm'))
Figure. Flow map of parks and departure points
ggplot(data = depaCountPlotHF) +
geom_sf(data = pprServiceArea %>% st_transform(crs=4326),fill ="transparent", color="black",size=0.5)+
geom_sf(data=pprDistrict %>% st_transform(crs=4326),fill ="transparent", color="black",size=1)+
geom_curve(aes(x = departure.x, y = departure.y, xend = park.x, yend = park.y,color = q5(visitors)),
size = 1,curvature = -0.2, alpha = 0.4)+
scale_color_manual(values = palette5,
labels = qBr(depaCountPlot2,"visitors"),
name = "Device Visits (Quintile Breaks)") +
labs(x="",y="")+
mapTheme()+
theme(legend.position = "bottom",panel.spacing = unit(6, 'lines'),
legend.key.width = unit(0.5, 'cm'),
legend.key.height = unit(0.2, 'cm'))
Figure. Flow map of parks and departure - High Frequency
dwellTimeForPlot <- dwellTime %>%
mutate(dwellTimes = recode(dwellTimes,
"<5" = 2.5,
"5-10" = 7.5,
"11-20" = 15,
"21-60" = 40,
"61-120" = 90,
"121-240" = 180,
">240" = 0)) %>%
mutate(sepTotalDwellTime = (visitors*dwellTimes)) %>%
group_by(placekey) %>%
mutate(totalVisitors=sum(visitors) )%>%
filter(totalVisitors>50) %>%
mutate(avgDwellTime=sum(sepTotalDwellTime)/totalVisitors) %>%
dplyr::select(placekey,avgDwellTime) %>%
distinct()
ggplot(dwellTimeForPlot)+
geom_sf(data=pprServiceArea,color='black',size=0.25,fill= "transparent")+
geom_sf(data=pprDistrict,color='black',size=0.75,fill='transparent')+
geom_sf(aes(size = avgDwellTime,color = avgDwellTime),alpha = 0.5) +
scale_size_continuous(range = c(0,2),name = "avgDwellTime")+
scale_color_continuous(low = '#FFDEDB',high ='#FF2903',
name = "avgDwellTime") +
mapTheme()+
theme(legend.position = "bottom",
legend.key.width = unit(0.5, 'cm'),
legend.key.height = unit(0.2, 'cm'))
Figure. Map of dwell time
dwellTImePlotServiceArea <- pprServiceArea %>%
st_join(dwellTimeForPlot,left =TRUE) %>%
drop_na(INFO) %>%
dplyr::select(INFO,avgDwellTime,geometry) %>%
group_by(INFO) %>%
mutate(avgDwellTime=mean(avgDwellTime)) %>%
distinct() %>%
st_sf() %>%
st_transform(crs=crs)
ggplot(dwellTImePlotServiceArea)+
geom_sf(aes(fill = q5(avgDwellTime)),color = 'white',size=0.5) +
scale_fill_manual(values = palette5,labels = qBr(dwellTImePlotServiceArea,'avgDwellTime'),name = "Average Device Dwelling Time") +
mapTheme()+
theme(legend.position = "bottom",
legend.key.width = unit(0.5, 'cm'),
legend.key.height = unit(0.2, 'cm'))
Figure. Map of average dwelling time
# calculate mean dwell time
dwellMean <- dwellTime %>%
st_drop_geometry() %>%
mutate(dwell = case_when(dwellTimes=="5-10" ~ 7.5,
dwellTimes=="11-20" ~ 15.5,
dwellTimes=="21-60" ~ 40.5,
dwellTimes=="61-120" ~ 90.5,
dwellTimes=="121-240" ~ 180.5,
dwellTimes==">240" ~ 270),
dwellMuti = dwell*visitors) %>%
group_by(placekey, month) %>%
summarize(dwellMuti = sum(dwellMuti),
visitors = sum(visitors)) %>%
mutate(meanDwell = dwellMuti/visitors)
## `summarise()` has grouped output by 'placekey'. You can override using the `.groups` argument.
# integrate dwell, total counts and permit
sgIntegrated_byMonth <- full_join(dwellMean %>% dplyr::select(placekey,month,meanDwell),
visitCount %>%
st_drop_geometry() %>%
group_by(placekey, month) %>%
summarize(visits = sum(visits)),
by=c('placekey','month')) %>%
na.omit() %>%
ungroup() %>%
left_join(propertyWithPlaceKey %>% dplyr::select(placekey, OBJECTID)) %>%
left_join(permit2021.join %>%
group_by(PPR_Properties_ObjectID) %>%
summarize(permits = n()),
by=c("OBJECTID"="PPR_Properties_ObjectID")) %>%
na.omit() %>%
st_sf() %>%
dplyr::select(-OBJECTID) %>%
group_by(placekey, month) %>%
summarize(permits = sum(permits),
visits = mean(visits),
meanDwell = mean(meanDwell)) %>%
ungroup()
## `summarise()` has grouped output by 'placekey'. You can override using the `.groups` argument.
## Joining, by = "placekey"
## `summarise()` has grouped output by 'placekey'. You can override using the `.groups` argument.
# normalize
sgIntegrated_byMonth.scale <- scale(sgIntegrated_byMonth %>%
dplyr::select(-placekey,-month) %>%
st_drop_geometry())
set.seed(1234)
# decide cluster number (only run once)
# nc <- NbClust(sgIntegrated_byMonth.scale, min.nc=2, max.nc=15, method="kmeans", index="all")
# table(nc$Best.n[1,])
#
# barplot(table(nc$Best.n[1,]),
# xlab="Numer of Clusters", ylab="Number of Criteria",
# main="Number of Clusters Chosen by 26 Criteria")
# Run K-Means cluster
cluster2 <- kmeans(sgIntegrated_byMonth.scale, 4)
summary(cluster2)
## Length Class Mode
## cluster 948 -none- numeric
## centers 12 -none- numeric
## totss 1 -none- numeric
## withinss 4 -none- numeric
## tot.withinss 1 -none- numeric
## betweenss 1 -none- numeric
## size 4 -none- numeric
## iter 1 -none- numeric
## ifault 1 -none- numeric
# add cluster number back
sgIntegrated_byMonth <- sgIntegrated_byMonth %>%
mutate(cluster = cluster2$cluster)
# mean by cluster
cluster2_mean <- aggregate(sgIntegrated_byMonth %>%
dplyr::select(-placekey, -cluster,-month) %>%
st_drop_geometry(),
by=list(cluster=sgIntegrated_byMonth$cluster),
FUN=mean) %>%
left_join(sgIntegrated_byMonth %>%
st_drop_geometry() %>%
group_by(cluster) %>%
summarize(size = n()),
by="cluster")
kable(cluster2_mean,align = 'c',caption = '<center>Table 3. Mean values of clusters for conflicts <center/>') %>%
kable_classic(full_width = F)%>%
kable_styling(position = "center")%>%
scroll_box(width = "100%", height = "400px")
| cluster | permits | visits | meanDwell | size |
|---|---|---|---|---|
| 1 | 12 | 471 | 68 | 414 |
| 2 | 48 | 587 | 77 | 187 |
| 3 | 12 | 411 | 110 | 289 |
| 4 | 12 | 9855 | 191 | 58 |
## put histograms on the diagonal
panel.hist <- function(x, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks; nB <- length(breaks)
y <- h$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, ...)
}
#Color points by groups
my_cols <- palette4[1:4]
pairs(sgIntegrated_byMonth %>%
st_drop_geometry() %>%
dplyr::select(meanDwell,visits,permits),
pch = 19, cex = 1, diag.panel = panel.hist,
col = my_cols[sgIntegrated_byMonth$cluster],
lower.panel=NULL, panel = panel.smooth)
Figure. Correlation Matrix of Conflicts between Mobility Data $ Activities
sgIntegrated_byMonth %>%
st_drop_geometry() %>%
filter(cluster == 2) %>%
group_by(placekey) %>%
summarize(howManyMonthBelongToThisCluster=n()) %>%
kable(.,align = 'c') %>%
kable_classic(full_width = F)%>%
kable_styling(position = "center")%>%
scroll_box(width = "100%", height = "400px")
| placekey | howManyMonthBelongToThisCluster |
|---|---|
| 222-222@628-pgm-jgk | 11 |
| zzw-222@628-pg9-h5z | 11 |
| zzw-222@628-pm4-fpv | 11 |
| zzw-222@628-pm7-rtv | 11 |
| zzw-222@628-pm8-ghq | 11 |
| zzw-223@628-pgc-g6k | 5 |
| zzy-222@628-pg8-r6k | 11 |
| zzy-222@63s-dwd-kxq | 1 |
| zzz-222@628-pg9-hwk | 11 |
| zzz-222@628-pgf-kcq | 11 |
| zzz-222@628-pgk-wff | 11 |
| zzz-222@628-pgm-jgk | 11 |
| zzz-222@628-pm3-xyv | 2 |
| zzz-222@628-pm4-fpv | 11 |
| zzz-222@628-pm7-3kf | 11 |
| zzz-222@628-pm8-vj9 | 11 |
| zzz-222@628-pm9-28v | 11 |
| zzz-222@63s-dvr-yy9 | 9 |
| zzz-222@63s-dvw-sh5 | 11 |
| zzz-222@63s-dwg-28v | 5 |
sgIntegrated_byMonth %>%
st_drop_geometry() %>%
filter(cluster == 4) %>%
group_by(placekey) %>%
summarize(size=n()) %>%
kable(.,align = 'c') %>%
kable_classic(full_width = F)%>%
kable_styling(position = "center")%>%
scroll_box(width = "100%", height = "400px")
| placekey | size |
|---|---|
| 222-223@628-pgk-cbk | 1 |
| zzw-222@628-pg9-tgk | 11 |
| zzw-222@63s-dvx-cyv | 2 |
| zzz-222@628-pg9-tgk | 7 |
| zzz-222@628-pgm-9cq | 1 |
| zzz-222@628-ph6-r49 | 5 |
| zzz-222@628-pm7-nnq | 9 |
| zzz-222@628-pm9-jn5 | 11 |
| zzz-222@63s-dvq-5fz | 11 |
cluster1 & 3 : Comparatively normal positions. The dwell pattern is different for these two clusters.
cluster 2 (choose one place with a howManyMonthBelongToThisCluster of 11, there are many of them): normal performance on sg data (dwell and visits), but with many permit plans
cluster4 ‘zzz-222@63s-dvq-5fz’ ‘zzz-222@628-pm9-jn5’ ‘zzw-222@628-pg9-tgk’ (choose one or more): perform well in sg data (dwell and visits), but not more permit plans
These should come from the outcome of clustering Analysis
visitCount789 <-
st_join(visitCount %>% st_transform(crs=4326),pprServiceArea%>% st_transform(crs=4326),left=TRUE) %>%
filter(PPR_DIST %in% c(7,8,9))
visitCount789 <- visitCount789 %>%
dplyr::select(placekey,visits) %>%
st_drop_geometry() %>%
group_by(placekey) %>%
mutate(totalVisits = sum(visits)) %>%
dplyr::select(-visits) %>%
distinct()
Figure Matthias Baldwin Park
Figure Matthias Baldwin Park
sumVisit <- dwellTime %>%
filter(placekey=='zzz-222@63s-dvq-5fz') %>%
dplyr::select(-month) %>%
group_by(dwellTimes) %>%
summarise(visitors=sum(visitors)) %>%
st_drop_geometry()
sumVisit$dwellTimes <- factor(sumVisit$dwellTimes, levels= c("<5","5-10","11-20","21-60","61-120","121-240",">240" ))
sumVisit%>%
ggplot(aes(dwellTimes,visitors)) +
geom_bar(position ="dodge",fill = palette1_main,stat='identity') +
labs(x="Dwell Time", y="Aggregated Visitors",
title ='Christy Recreation Center (zzz-222@63s-dvq-5fz)') +
plotTheme(10,20)
Figure. Map of dwell time
visitCount %>%
filter(placekey=='zzz-222@63s-dvq-5fz') %>%
st_drop_geometry()%>%
na.omit()%>%
ggplot(aes(visitDay,visits)) +
geom_line(color=palette1_main,size=1)+
labs(title = 'Christy Recreation Center (zzz-222@63s-dvq-5fz)',x="Visit Date",y="Safegraph Visit")+
plotTheme(10,20)+
theme(panel.border = element_rect(colour = "black", fill=NA, size=1))
Figure. Map of dwell time
sumVisit <- visitHour %>%
filter(placekey=='zzz-222@63s-dvq-5fz') %>%
dplyr::select(-month) %>%
group_by(hour) %>%
summarise(visits=sum(visit)) %>%
st_drop_geometry()
sumVisit%>%
ggplot(aes(hour,visits)) +
geom_bar(position ="dodge",fill = palette1_main,stat='identity') +
labs(x="hour", y="Aggregated Visits",
title ='Christy Recreation Center (zzz-222@63s-dvq-5fz)') +
plotTheme(10,20)
Figure. Map of visit time
And its reasons analysis
These should come from the outcome of clustering Analysis
visitCount789 <-
st_join(visitCount %>% st_transform(crs=4326),pprServiceArea%>% st_transform(crs=4326),left=TRUE) %>%
filter(PPR_DIST %in% c(7,8,9))
visitCount789 <- visitCount789 %>%
dplyr::select(placekey,visits) %>%
st_drop_geometry() %>%
group_by(placekey) %>%
mutate(totalVisits = sum(visits)) %>%
dplyr::select(-visits) %>%
distinct()
Figure Matthias Baldwin Park
Figure Matthias Baldwin Park
sumVisit <- dwellTime %>%
filter(placekey=='zzz-222@63s-dvq-5fz') %>%
dplyr::select(-month) %>%
group_by(dwellTimes) %>%
summarise(visitors=sum(visitors)) %>%
st_drop_geometry()
sumVisit$dwellTimes <- factor(sumVisit$dwellTimes, levels= c("<5","5-10","11-20","21-60","61-120","121-240",">240" ))
sumVisit%>%
ggplot(aes(dwellTimes,visitors)) +
geom_bar(position ="dodge",fill = palette1_main,stat='identity') +
labs(x="Dwell Time", y="Aggregated Visitors",
title ='Christy Recreation Center (zzz-222@63s-dvq-5fz)') +
plotTheme(10,20)
Figure. Map of dwell time
visitCount %>%
filter(placekey=='zzz-222@63s-dvq-5fz') %>%
st_drop_geometry()%>%
na.omit()%>%
ggplot(aes(visitDay,visits)) +
geom_line(color=palette1_main,size=1)+
labs(title = 'Christy Recreation Center (zzz-222@63s-dvq-5fz)',x="Visit Date",y="Safegraph Visit")+
plotTheme(10,20)+
theme(panel.border = element_rect(colour = "black", fill=NA, size=1))
Figure. Map of dwell time
sumVisit <- visitHour %>%
filter(placekey=='zzz-222@63s-dvq-5fz') %>%
dplyr::select(-month) %>%
group_by(hour) %>%
summarise(visits=sum(visit)) %>%
st_drop_geometry()
sumVisit%>%
ggplot(aes(hour,visits)) +
geom_bar(position ="dodge",fill = palette1_main,stat='identity') +
labs(x="hour", y="Aggregated Visits",
title ='Christy Recreation Center (zzz-222@63s-dvq-5fz)') +
plotTheme(10,20)
Figure. Map of visit time
And its reasons analysis
These should come from the outcome of clustering Analysis
visitCount789 <-
st_join(visitCount %>% st_transform(crs=4326),pprServiceArea%>% st_transform(crs=4326),left=TRUE) %>%
filter(PPR_DIST %in% c(7,8,9))
visitCount789 <- visitCount789 %>%
dplyr::select(placekey,visits) %>%
st_drop_geometry() %>%
group_by(placekey) %>%
mutate(totalVisits = sum(visits)) %>%
dplyr::select(-visits) %>%
distinct()
Figure Matthias Baldwin Park
Figure Matthias Baldwin Park
sumVisit <- dwellTime %>%
filter(placekey=='zzz-222@63s-dvq-5fz') %>%
dplyr::select(-month) %>%
group_by(dwellTimes) %>%
summarise(visitors=sum(visitors)) %>%
st_drop_geometry()
sumVisit$dwellTimes <- factor(sumVisit$dwellTimes, levels= c("<5","5-10","11-20","21-60","61-120","121-240",">240" ))
sumVisit%>%
ggplot(aes(dwellTimes,visitors)) +
geom_bar(position ="dodge",fill = palette1_main,stat='identity') +
labs(x="Dwell Time", y="Aggregated Visitors",
title ='Christy Recreation Center (zzz-222@63s-dvq-5fz)') +
plotTheme(10,20)
Figure. Map of dwell time
visitCount %>%
filter(placekey=='zzz-222@63s-dvq-5fz') %>%
st_drop_geometry()%>%
na.omit()%>%
ggplot(aes(visitDay,visits)) +
geom_line(color=palette1_main,size=1)+
labs(title = 'Christy Recreation Center (zzz-222@63s-dvq-5fz)',x="Visit Date",y="Safegraph Visit")+
plotTheme(10,20)+
theme(panel.border = element_rect(colour = "black", fill=NA, size=1))
Figure. Map of dwell time
sumVisit <- visitHour %>%
filter(placekey=='zzz-222@63s-dvq-5fz') %>%
dplyr::select(-month) %>%
group_by(hour) %>%
summarise(visits=sum(visit)) %>%
st_drop_geometry()
sumVisit%>%
ggplot(aes(hour,visits)) +
geom_bar(position ="dodge",fill = palette1_main,stat='identity') +
labs(x="hour", y="Aggregated Visits",
title ='Christy Recreation Center (zzz-222@63s-dvq-5fz)') +
plotTheme(10,20)
Figure. Map of visit time
And its reasons analysis
Jeff Dashboard